# -*- coding: utf-8 -*-
import scrapy
import os,time,pymongo
from lxml import etree #导入xpath数据提取模块
from scrapy.http import Request #一个单独的request的模块，需要跟进URL的时候，需要用它
from mywed.myfirst.myfirst.items import MyfirstItem #这是我定义的需要保存的字段，（导入myfirst项目中，items文件中的MyfirstItem类）
print(os.getcwd())


class HshSpider(scrapy.Spider):
    name = 'hsh'
    allowed_domains = ['tencent.com']
    start_urls = ['http://hr.tencent.com/position.php?tid=87&start=0#a']

    def parse(self, response):
        a=0
        data_list = response.xpath('//tr[@class="even"]')
        for data in data_list:
            item = MyfirstItem()
            item['position_link'] = data.xpath('./td/a/@href').extract()
            item['position_name'] = data.xpath('./td/a/text()').extract()
            item['position_type'] = data.xpath('./td[2]/text()').extract()
            item['position_number'] = data.xpath('./td[3]/text()').extract()
            item['position_address'] = data.xpath('./td[4]/text()').extract()
            item['position_time'] = data.xpath('./td[5]/text()').extract()
            #print(item['position_link'], item['position_name'])
            yield item

        for i in range(98):

            a+=10
            url = 'http://hr.tencent.com/position.php?tid=87&start='+ str(a)+ '#a'
            yield scrapy.Request(url,callback=self.parse)

        













